{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Installation and imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already up-to-date: kfmd in ./.local/lib/python3.6/site-packages (0.1.8)\n",
      "Requirement already up-to-date: pandas in ./.local/lib/python3.6/site-packages (1.0.1)\n",
      "Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.8.1)\n",
      "Requirement already satisfied, skipping upgrade: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas) (1.18.1)\n",
      "Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2019.3)\n",
      "Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.6.1->pandas) (1.11.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install kfmd --upgrade --user\n",
    "!pip install pandas --upgrade --user\n",
    "\n",
    "from kfmd import metadata\n",
    "import pandas\n",
    "from datetime import datetime\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a workspace, run and execution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ws1 = metadata.Workspace(\n",
    "    # Connect to metadata-service in namesapce kubeflow in k8s cluster.\n",
    "    backend_url_prefix=\"metadata-service.kubeflow.svc.cluster.local:8080\",\n",
    "    name=\"ws1\",\n",
    "    description=\"a workspace for testing\",\n",
    "    labels={\"n1\": \"v1\"})\n",
    "r = metadata.Run(\n",
    "    workspace=ws1,\n",
    "    name=\"run-\" + datetime.utcnow().isoformat(\"T\") ,\n",
    "    description=\"a run in ws_1\",\n",
    ")\n",
    "exec = metadata.Execution(\n",
    "    name = \"execution\" + datetime.utcnow().isoformat(\"T\") ,\n",
    "    workspace=ws1,\n",
    "    run=r,\n",
    "    description=\"execution example\",\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Log data set, model and its evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_set = exec.log_input(\n",
    "        metadata.DataSet(\n",
    "            description=\"an example data\",\n",
    "            name=\"mytable-dump\",\n",
    "            owner=\"owner@my-company.org\",\n",
    "            uri=\"file://path/to/dataset\",\n",
    "            version=\"v1.0.0\",\n",
    "            query=\"SELECT * FROM mytable\"))\n",
    "model = exec.log_output(\n",
    "    metadata.Model(\n",
    "            name=\"MNIST\",\n",
    "            description=\"model to recognize handwritten digits\",\n",
    "            owner=\"someone@kubeflow.org\",\n",
    "            uri=\"gcs://my-bucket/mnist\",\n",
    "            model_type=\"neural network\",\n",
    "            training_framework={\n",
    "                \"name\": \"tensorflow\",\n",
    "                \"version\": \"v1.0\"\n",
    "            },\n",
    "            hyperparameters={\n",
    "                \"learning_rate\": 0.5,\n",
    "                \"layers\": [10, 3, 1],\n",
    "                \"early_stop\": True\n",
    "            },\n",
    "            version=\"v0.0.1\",\n",
    "            labels={\"mylabel\": \"l1\"}))\n",
    "metrics = exec.log_output(\n",
    "    metadata.Metrics(\n",
    "            name=\"MNIST-evaluation\",\n",
    "            description=\"validating the MNIST model to recognize handwritten digits\",\n",
    "            owner=\"someone@kubeflow.org\",\n",
    "            uri=\"gcs://my-bucket/mnist-eval.csv\",\n",
    "            data_set_id=data_set.id,\n",
    "            model_id=model.id,\n",
    "            metrics_type=metadata.Metrics.VALIDATION,\n",
    "            values={\"accuracy\": 0.95},\n",
    "            labels={\"mylabel\": \"l1\"}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "List all the models in the workspace"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>workspace</th>\n",
       "      <th>run</th>\n",
       "      <th>create_time</th>\n",
       "      <th>description</th>\n",
       "      <th>model_type</th>\n",
       "      <th>name</th>\n",
       "      <th>owner</th>\n",
       "      <th>version</th>\n",
       "      <th>uri</th>\n",
       "      <th>training_framework</th>\n",
       "      <th>hyperparameters</th>\n",
       "      <th>labels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>ws1</td>\n",
       "      <td>run-2020-02-18T00:48:10.734939</td>\n",
       "      <td>2020-02-18T00:48:13.273533Z</td>\n",
       "      <td>model to recognize handwritten digits</td>\n",
       "      <td>neural network</td>\n",
       "      <td>MNIST</td>\n",
       "      <td>someone@kubeflow.org</td>\n",
       "      <td>v0.0.1</td>\n",
       "      <td>gcs://my-bucket/mnist</td>\n",
       "      <td>{'name': 'tensorflow', 'version': 'v1.0'}</td>\n",
       "      <td>{'learning_rate': 0.5, 'layers': [10, 3, 1], '...</td>\n",
       "      <td>{'mylabel': 'l1'}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id workspace                             run                  create_time  \\\n",
       "0  8       ws1  run-2020-02-18T00:48:10.734939  2020-02-18T00:48:13.273533Z   \n",
       "\n",
       "                             description      model_type   name  \\\n",
       "0  model to recognize handwritten digits  neural network  MNIST   \n",
       "\n",
       "                  owner version                    uri  \\\n",
       "0  someone@kubeflow.org  v0.0.1  gcs://my-bucket/mnist   \n",
       "\n",
       "                          training_framework  \\\n",
       "0  {'name': 'tensorflow', 'version': 'v1.0'}   \n",
       "\n",
       "                                     hyperparameters             labels  \n",
       "0  {'learning_rate': 0.5, 'layers': [10, 3, 1], '...  {'mylabel': 'l1'}  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pandas.DataFrame.from_dict(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get basic lineage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "model id is 8\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"model id is %s\\n\" % model.id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find the execution that produces this model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n"
     ]
    }
   ],
   "source": [
    "output_events = ws1.client.list_events2(model.id).events\n",
    "assert len(output_events) == 1\n",
    "execution_id = output_events[0].execution_id\n",
    "print(execution_id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find all events related to that execution."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "All events related to this model:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>artifact_id</th>\n",
       "      <th>execution_id</th>\n",
       "      <th>path</th>\n",
       "      <th>type</th>\n",
       "      <th>milliseconds_since_epoch</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>None</td>\n",
       "      <td>INPUT</td>\n",
       "      <td>1581986893248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>None</td>\n",
       "      <td>OUTPUT</td>\n",
       "      <td>1581986893273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "      <td>None</td>\n",
       "      <td>OUTPUT</td>\n",
       "      <td>1581986893298</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  artifact_id execution_id  path    type milliseconds_since_epoch\n",
       "0           7            3  None   INPUT            1581986893248\n",
       "1           8            3  None  OUTPUT            1581986893273\n",
       "2           9            3  None  OUTPUT            1581986893298"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_events = ws1.client.list_events(execution_id).events\n",
    "assert len(all_events) == 3\n",
    "print(\"\\nAll events related to this model:\")\n",
    "pandas.DataFrame.from_dict([e.to_dict() for e in all_events])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
